Brian M. Schilder, Bioinformatician II
root = "./"
# Import functions
source(file.path(root, "./general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] "400"
##
## $resolution
## [1] "0.2"
##
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-400__Resolution-0.2__perplexity-30__nCores-4"
##
## $nCores
## [1] 4
##
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))__Results/subsetGenes-protein_coding__subsetCells-400__Resolution-0.2__perplexity-30nCores-4
library(Seurat)
library(dplyr)
library(gridExtra)
library(knitr)
library(plotly)
library(ggplot2)
library(viridis)
library(reshape2)
library(shiny)
library(ggrepel)
library(DT)
library(ComplexHeatmap); #BiocManager::install("ComplexHeatmap")
## Install Bioconductor
# if (!requireNamespace("BiocManager"))
# install.packages("BiocManager")
library(biomaRt) # BiocManager::install(c("biomaRt"))
library(DESeq2) # BiocManager::install(c("DESeq2"))
library(enrichR) #BiocManager::install("enrichR")
library(monocle) #BiocManager::install("monocle")
# BiocManager::install("DelayedMatrixStats")
# BiocManager::install("org.Mm.eg.db")
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
sessionInfo()## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] splines stats4 parallel grid stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] DESeq2_1.22.2 SummarizedExperiment_1.12.0
## [3] DelayedArray_0.8.0 BiocParallel_1.16.5
## [5] matrixStats_0.54.0 GenomicRanges_1.34.0
## [7] GenomeInfoDb_1.18.1 RColorBrewer_1.1-2
## [9] bindrcpp_0.2.2 garnett_0.1.1
## [11] org.Hs.eg.db_3.7.0 AnnotationDbi_1.44.0
## [13] IRanges_2.16.0 S4Vectors_0.20.1
## [15] monocle_2.10.1 DDRTree_0.1.5
## [17] irlba_2.3.3 VGAM_1.0-6
## [19] Biobase_2.42.0 BiocGenerics_0.28.0
## [21] enrichR_1.0 biomaRt_2.38.0
## [23] ComplexHeatmap_1.20.0 ggrepel_0.8.0
## [25] reshape2_1.4.3 viridis_0.5.1
## [27] viridisLite_0.3.0 DT_0.5.1
## [29] shiny_1.2.0 plotly_4.8.0
## [31] knitr_1.21 gridExtra_2.3
## [33] dplyr_0.7.8 Seurat_2.3.4
## [35] Matrix_1.2-15 cowplot_0.9.4
## [37] ggplot2_3.1.0
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.10 R.utils_2.7.0 tidyselect_0.2.5
## [4] RSQLite_2.1.1 htmlwidgets_1.3 combinat_0.0-8
## [7] trimcluster_0.1-2.1 docopt_0.6.1 Rtsne_0.15
## [10] munsell_0.5.0 codetools_0.2-16 ica_1.0-2
## [13] withr_2.1.2 colorspace_1.4-0 fastICA_1.2-1
## [16] rstudioapi_0.9.0 ROCR_1.0-7 robustbase_0.93-3
## [19] dtw_1.20-1 gbRd_0.4-11 Rdpack_0.10-1
## [22] labeling_0.3 lars_1.2 slam_0.1-44
## [25] GenomeInfoDbData_1.2.0 bit64_0.9-7 pheatmap_1.0.12
## [28] xfun_0.4 diptest_0.75-7 R6_2.3.0
## [31] locfit_1.5-9.1 hdf5r_1.0.1 flexmix_2.3-14
## [34] bitops_1.0-6 assertthat_0.2.0 promises_1.0.1
## [37] SDMTools_1.1-221 scales_1.0.0 nnet_7.3-12
## [40] gtable_0.2.0 npsurv_0.4-0 rlang_0.3.1
## [43] genefilter_1.64.0 GlobalOptions_0.1.0 lazyeval_0.2.1
## [46] acepack_1.4.1 checkmate_1.9.1 yaml_2.2.0
## [49] crosstalk_1.0.0 backports_1.1.3 httpuv_1.4.5.1
## [52] Hmisc_4.2-0 tools_3.5.1 gplots_3.0.1.1
## [55] proxy_0.4-22 ggridges_0.5.1 Rcpp_1.0.0
## [58] plyr_1.8.4 zlibbioc_1.28.0 base64enc_0.1-3
## [61] progress_1.2.0 purrr_0.3.0 RCurl_1.95-4.11
## [64] densityClust_0.3 prettyunits_1.0.2 rpart_4.1-13
## [67] pbapply_1.4-0 GetoptLong_0.1.7 zoo_1.8-4
## [70] cluster_2.0.7-1 magrittr_1.5 data.table_1.12.0
## [73] circlize_0.4.5 lmtest_0.9-36 RANN_2.6.1
## [76] mvtnorm_1.0-8 fitdistrplus_1.0-14 hms_0.4.2
## [79] lsei_1.2-0 mime_0.6 evaluate_0.12
## [82] xtable_1.8-3 XML_3.98-1.17 mclust_5.4.2
## [85] sparsesvd_0.1-4 shape_1.4.4 HSMMSingleCell_1.2.0
## [88] compiler_3.5.1 tibble_2.0.1 KernSmooth_2.23-15
## [91] crayon_1.3.4 R.oo_1.22.0 htmltools_0.3.6
## [94] segmented_0.5-3.0 later_0.8.0 Formula_1.2-3
## [97] snow_0.4-3 geneplotter_1.60.0 tidyr_0.8.2
## [100] DBI_1.0.0 MASS_7.3-51.1 fpc_2.1-11.1
## [103] R.methodsS3_1.7.1 gdata_2.18.0 metap_1.1
## [106] bindr_0.1.1 igraph_1.2.2 pkgconfig_2.0.2
## [109] foreign_0.8-71 foreach_1.4.4 annotate_1.60.0
## [112] XVector_0.22.0 bibtex_0.4.2 stringr_1.4.0
## [115] digest_0.6.18 tsne_0.1-3 rmarkdown_1.11
## [118] htmlTable_1.13.1 kernlab_0.9-27 gtools_3.8.1
## [121] modeltools_0.2-22 rjson_0.2.20 nlme_3.1-137
## [124] jsonlite_1.6 limma_3.38.3 pillar_1.3.1
## [127] lattice_0.20-38 httr_1.4.0 DEoptimR_1.0-8
## [130] survival_2.43-3 glue_1.3.0 qlcMatrix_0.9.7
## [133] FNN_1.1.2.2 png_0.1-7 prabclus_2.2-7
## [136] iterators_1.0.10 bit_1.1-14 class_7.3-15
## [139] stringi_1.2.4 mixtools_1.1.0 blob_1.1.1
## [142] doSNOW_1.0.16 latticeExtra_0.6-28 caTools_1.17.1.1
## [145] memoise_1.1.0 ape_5.2
print(paste("Seurat ", packageVersion("Seurat")))## [1] "Seurat 2.3.4"
enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")for (clust in unique(DAT.markers.sig$cluster)){
cat('\n')
cat("### Cluster ",clust,"{.tabset .tabset-fade}\n")
geneList <- subset(DAT.markers.sig, cluster==clust)$gene %>% as.character()
results <- enrichr(genes = geneList, databases = enrichr_dbs )
for (db in enrichr_dbs){
cat('\n')
cat("#### ",db,"\n")
createDT_html(subset(results[[db]], Adjusted.P.value<=0.05), paste("Enrichr Results: ",db,"Cluster ", clust))
cat('\n')
}
cat('\n')
} ## Error in unique(DAT.markers.sig$cluster): object 'DAT.markers.sig' not found
eigengenes <- read.delim(file.path(root,"Data/bulkMonocytes_WGCNAmodules_geneMembership.txt"), row.names = NULL)
modules <- read.delim(file.path(root,"Data/bulkMonocytes_WGCNAmodules_geneModules.txt"), row.names = NULL, sep = "",
col.names = c("Ensembl","moduleColors"))
modules <- base::merge(eigengenes, modules,by="Ensembl" )
for (mod in unique(modules$moduleColors)){
cat('\n')
cat("### Module ",mod,"{.tabset .tabset-fade}\n")
geneList <- subset(modules, moduleColors==mod)$symbol %>% as.character()
results <- enrichr(genes = geneList, databases = enrichr_dbs )
for (db in enrichr_dbs){
cat('\n')
cat("#### ",db,"\n")
createDT_html(subset(results[[db]], Adjusted.P.value<=0.05), paste("Enrichr Results:",db,"Module", mod))
cat('\n')
}
cat('\n')
}Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
Determine whether each of the clusters in scRNA-seq data are enriched for WGCNA eigengenes (a weighted vector of all genes representing each co-expression module).
#Get the average expression of every gene in each cluster
allGenes <- get_markerDF(DAT, markerList = row.names(DAT@scale.data), meta_vars = c("post_clustering", "barcode") )
clusterGeneAvg <- allGenes %>% group_by(post_clustering, Gene) %>% summarise(meanExp = mean(Expression))
eigengenes_filt <- subset(eigengenes,symbol %in% unique(clusterGeneAvg$Gene))
clusts_by_mods <- base::merge(clusterGeneAvg[c("Gene","meanExp")], eigengenes_filt[c("symbol", modName)],
by.x="Gene", by.y="symbol")
cor.test()
corrplot()
heatmap.2
f <- function(module){
eigengene <- eigengenes[paste0("MM", mod)]
means <- tapply(eigengenes, DAT@meta.data$post_clustering, mean, na.rm = T)
return(means)
}
modules <- c("blue", "brown", "green", "turquoise", "yellow")
plotdat <- sapply(modules, f)
matplot(plotdat, col = modules, type = "l", lwd = 2, xaxt = "n", xlab = "Seurat Cluster",
ylab = "WGCNA Module Eigengene")
axis(1, at = 1:16, labels = 0:15)
matpoints(plotdat, col = modules, pch = 21)library(RRHO) #BiocManager::install("RRHO")
# list.length <- 100
# list.names <- paste('Gene',1:list.length, sep='')
# gene.list1<- data.frame(list.names, sample(100))
# gene.list2<- data.frame(list.names, sample(100))
for (clust in unique(DAT.markers.sig)){
# Compare each cluster
subClust <- subset(DAT.markers.sig, cluster==clust) %>% arrange(desc(avg_logFC))
for (mod in unique(modules$moduleColors)){
# Sort genes by module membership
modName <-paste("MM",mod,sep="")
subMod <- subset(modules, moduleColors==mod) %>% arrange(desc(eval(parse(text = modName))))
maxGenes <- min(length(subClust$gene), subMod$symbol) %>% as.numeric()
list1 <- subClust[1:maxGenes, c("gene","FC")] %>% dplyr::rename(value=FC)
list2 <- subMod[1:maxGenes, c("symbol",modName)] %>% dplyr::rename(gene=symbol, value=modName)
RRHO_path <-file.path("RRHO_results",paste(paste("Cluster",clust,sep=""),"vs",modName,sep="_"))
dir.create(RRHO_path,recursive = T, showWarnings = F)
RRHO_results <- RRHO(list1=list1, list2=list2,
labels = c(paste("Cluster",clust,sep="_"), paste("Module",mod,sep="_")),
plots = T, alternative = "enrichment", outputdir = RRHO_path, BY=TRUE
)
lattice::levelplot(RRHO_results$hypermat)
# Pval testing
pval.testing <- pvalRRHO(RRHO_results, 50)
pval.testing$pval
xs<- seq(0, 10, length=100)
plot(Vectorize(pval.testing$FUN.ecdf)(xs)~xs, xlab='-log(pvalue)', ylab='ECDF', type='S')
lattice::levelplot(RRHO_results$hypermat.by)
}
} save.image(file.path(resultsPath, "scRNAseq_results.RData"))